// This source file is part of the polarphp.org open source project

// Copyright (c) 2017 - 2018 polarphp software foundation
// Copyright (c) 2017 - 2018 zzu_softboy <zzu_softboy@163.com>
// Licensed under Apache License v2.0 with Runtime Library Exception

// See https://polarphp.org/LICENSE.txt for license information
// See https://polarphp.org/CONTRIBUTORS.txt for the list of polarphp project authors

// Created by polarboy on 2018/06/05.

#include "gtest/gtest.h"
#include "polarphp/utils/ConvertUtf.h"
#include "polarphp/basic/adt/ArrayRef.h"
#include "polarphp/basic/adt/StringRef.h"
#include "polarphp/basic/adt/SmallVector.h"

#include <string>
#include <vector>

using polar::basic::ArrayRef;
using polar::basic::StringRef;
using polar::basic::SmallVector;
using polar::utils::ConversionResult;
using polar::utils::ConversionFlags;

namespace utils = polar::utils;
namespace basic = polar::basic;

TEST(ConvertUtfTest, testConvertUTF16LittleEndianToUTF8String)
{
   // Src is the look of disapproval.
   static const char src[] = "\xff\xfe\xa0\x0c_\x00\xa0\x0c";
   ArrayRef<char> ref(src, sizeof(src) - 1);
   std::string result;
   bool success = utils::convert_utf16_to_utf8_string(ref, result);
   EXPECT_TRUE(success);
   std::string expected("\xe0\xb2\xa0_\xe0\xb2\xa0");
   EXPECT_EQ(expected, result);
}

TEST(ConvertUtfTest, testConvertUTF16BigEndianToUTF8String)
{
   // Src is the look of disapproval.
   static const char src[] = "\xfe\xff\x0c\xa0\x00_\x0c\xa0";
   ArrayRef<char> ref(src, sizeof(src) - 1);
   std::string result;
   bool success = utils::convert_utf16_to_utf8_string(ref, result);
   EXPECT_TRUE(success);
   std::string expected("\xe0\xb2\xa0_\xe0\xb2\xa0");
   EXPECT_EQ(expected, result);
}

TEST(ConvertUtfTest, testConvertUTF8ToUTF16String)
{
   // src is the look of disapproval.
   static const char src[] = "\xe0\xb2\xa0_\xe0\xb2\xa0";
   StringRef ref(src, sizeof(src) - 1);
   SmallVector<utils::Utf16, 5> result;
   bool success = utils::convert_utf8_to_utf16_string(ref, result);
   EXPECT_TRUE(success);
   static const utils::Utf16 expected[] = {0x0CA0, 0x005f, 0x0CA0, 0};
   ASSERT_EQ(3u, result.getSize());
   for (int index = 0, end = 3; index != end; ++index) {
      EXPECT_EQ(expected[index], result[index]);
   }
}

TEST(ConvertUtfTest, testOddLengthInput)
{
   std::string result;
   bool success = utils::convert_utf16_to_utf8_string(basic::make_array_ref("xxxxx", 5), result);
   EXPECT_FALSE(success);
}

TEST(ConvertUtfTest, testEmpty)
{
   std::string result;
   bool success = utils::convert_utf16_to_utf8_string(basic::ArrayRef<char>(std::nullopt), result);
   EXPECT_TRUE(success);
   EXPECT_TRUE(result.empty());
}

TEST(ConvertUtfTest, testHasUTF16BOM)
{
   bool hasBOM = utils::has_utf16_byte_order_mark(basic::make_array_ref("\xff\xfe", 2));
   EXPECT_TRUE(hasBOM);
   hasBOM = utils::has_utf16_byte_order_mark(basic::make_array_ref("\xfe\xff", 2));
   EXPECT_TRUE(hasBOM);
   hasBOM = utils::has_utf16_byte_order_mark(basic::make_array_ref("\xfe\xff ", 3));
   EXPECT_TRUE(hasBOM); // Don't care about odd lengths.
   hasBOM = utils::has_utf16_byte_order_mark(basic::make_array_ref("\xfe\xff\x00asdf", 6));
   EXPECT_TRUE(hasBOM);

   hasBOM = utils::has_utf16_byte_order_mark(std::nullopt);
   EXPECT_FALSE(hasBOM);
   hasBOM = utils::has_utf16_byte_order_mark(basic::make_array_ref("\xfe", 1));
   EXPECT_FALSE(hasBOM);
}

TEST(ConvertUtfTest, testUTF16WrappersForConvertUTF16ToUTF8String)
{
   // Src is the look of disapproval.
   static const char src[] = "\xff\xfe\xa0\x0c_\x00\xa0\x0c";
   ArrayRef<utils::Utf16> srcRef = basic::make_array_ref((const utils::Utf16 *)src, 4);
   std::string result;
   bool success = utils::convert_utf16_to_utf8_string(srcRef, result);
   EXPECT_TRUE(success);
   std::string expected("\xe0\xb2\xa0_\xe0\xb2\xa0");
   EXPECT_EQ(expected, result);
}

TEST(ConvertUtfTest, testConvertUTF8toWide)
{
   // Src is the look of disapproval.
   static const char src[] = "\xe0\xb2\xa0_\xe0\xb2\xa0";
   std::wstring result;
   bool success = utils::convert_utf8_to_wide((const char*)src, result);
   EXPECT_TRUE(success);
   std::wstring expected(L"\x0ca0_\x0ca0");
   EXPECT_EQ(expected, result);
   result.clear();
   success = utils::convert_utf8_to_wide(StringRef(src, 7), result);
   EXPECT_TRUE(success);
   EXPECT_EQ(expected, result);
}

TEST(ConvertUtfTest, testConvertWideToUTF8)
{
   // Src is the look of disapproval.
   static const wchar_t src[] = L"\x0ca0_\x0ca0";
   std::string result;
   bool success = utils::convert_wide_to_utf8(src, result);
   EXPECT_TRUE(success);
   std::string expected("\xe0\xb2\xa0_\xe0\xb2\xa0");
   EXPECT_EQ(expected, result);
}

namespace {


struct ConvertUTFResultContainer
{
   ConversionResult m_errorCode;
   std::vector<char32_t> m_unicodeScalars;

   ConvertUTFResultContainer(ConversionResult errorCode)
      : m_errorCode(errorCode)
   {}

   ConvertUTFResultContainer
   withScalars(unsigned US0 = 0x110000, unsigned US1 = 0x110000,
               unsigned US2 = 0x110000, unsigned US3 = 0x110000,
               unsigned US4 = 0x110000, unsigned US5 = 0x110000,
               unsigned US6 = 0x110000, unsigned US7 = 0x110000)
   {
      ConvertUTFResultContainer result(*this);
      if (US0 != 0x110000)
         result.m_unicodeScalars.push_back(US0);
      if (US1 != 0x110000)
         result.m_unicodeScalars.push_back(US1);
      if (US2 != 0x110000)
         result.m_unicodeScalars.push_back(US2);
      if (US3 != 0x110000)
         result.m_unicodeScalars.push_back(US3);
      if (US4 != 0x110000)
         result.m_unicodeScalars.push_back(US4);
      if (US5 != 0x110000)
         result.m_unicodeScalars.push_back(US5);
      if (US6 != 0x110000)
         result.m_unicodeScalars.push_back(US6);
      if (US7 != 0x110000)
         result.m_unicodeScalars.push_back(US7);
      return result;
   }
};

std::pair<ConversionResult, std::vector<char32_t>>
convert_utf8_to_unicode_scalars_lenient(StringRef str)
{
   const utils::Utf8 *sourceStart = reinterpret_cast<const utils::Utf8 *>(str.getData());

   const utils::Utf8 *sourceNext = sourceStart;
   std::vector<utils::Utf32> decoded(str.getSize(), 0);
   utils::Utf32 *targetStart = decoded.data();

   auto errorCode =
         utils::convert_utf8_to_utf32(&sourceNext, sourceStart + str.getSize(), &targetStart,
                                      decoded.data() + decoded.size(), ConversionFlags::LenientConversion);

   decoded.resize(targetStart - decoded.data());

   return std::make_pair(errorCode, decoded);
}

std::pair<ConversionResult, std::vector<char32_t>>
convert_utf8_to_unicode_scalars_partial_lenient(StringRef str) {
   const utils::Utf8 *sourceStart = reinterpret_cast<const utils::Utf8 *>(str.getData());

   const utils::Utf8 *sourceNext = sourceStart;
   std::vector<utils::Utf32> decoded(str.getSize(), 0);
   utils::Utf32 *targetStart = decoded.data();

   auto errorCode = utils::convert_utf8_to_utf32_partial(
            &sourceNext, sourceStart + str.getSize(), &targetStart,
            decoded.data() + decoded.size(), ConversionFlags::LenientConversion);

   decoded.resize(targetStart - decoded.data());

   return std::make_pair(errorCode, decoded);
}

::testing::AssertionResult
check_convert_utf8_to_unicode_scalars(ConvertUTFResultContainer expected,
                                      StringRef str, bool partial = false) {
   ConversionResult errorCode;
   std::vector<char32_t> decoded;
   if (!partial) {
      std::tie(errorCode, decoded) = convert_utf8_to_unicode_scalars_lenient(str);
   } else {
      std::tie(errorCode, decoded) = convert_utf8_to_unicode_scalars_partial_lenient(str);
   }
   if (expected.m_errorCode != errorCode) {
      return ::testing::AssertionFailure() << "Expected error code "
                                           << (int)expected.m_errorCode << ", actual "
                                           << (int)errorCode;
   }
   if (expected.m_unicodeScalars != decoded)
      return ::testing::AssertionFailure()
            << "Expected lenient decoded result:\n"
            << ::testing::PrintToString(expected.m_unicodeScalars) << "\n"
            << "Actual result:\n" << ::testing::PrintToString(decoded);

   return ::testing::AssertionSuccess();
}

} // anonymous namespace

TEST(ConvertUtfTest, testUTF8ToUTF32Lenient)
{

   //
   // 1-byte sequences
   //

   // U+0041 LATIN CAPITAL LETTER A
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::ConversionOK).withScalars(0x0041), "\x41"));

   //
   // 2-byte sequences
   //

   // U+0283 LATIN SMALL LETTER ESH
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::ConversionOK).withScalars(0x0283),
                  "\xca\x83"));

   // U+03BA GREEK SMALL LETTER KAPPA
   // U+1F79 GREEK SMALL LETTER OMICRON WITH OXIA
   // U+03C3 GREEK SMALL LETTER SIGMA
   // U+03BC GREEK SMALL LETTER MU
   // U+03B5 GREEK SMALL LETTER EPSILON
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::ConversionOK)
                  .withScalars(0x03ba, 0x1f79, 0x03c3, 0x03bc, 0x03b5),
                  "\xce\xba\xe1\xbd\xb9\xcf\x83\xce\xbc\xce\xb5"));

   //
   // 3-byte sequences
   //

   // U+4F8B CJK UNIFIED IDEOGRAPH-4F8B
   // U+6587 CJK UNIFIED IDEOGRAPH-6587
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::ConversionOK).withScalars(0x4f8b, 0x6587),
                  "\xe4\xbe\x8b\xe6\x96\x87"));

   // U+D55C HANGUL SYLLABLE HAN
   // U+AE00 HANGUL SYLLABLE GEUL
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::ConversionOK).withScalars(0xd55c, 0xae00),
                  "\xed\x95\x9c\xea\xb8\x80"));

   // U+1112 HANGUL CHOSEONG HIEUH
   // U+1161 HANGUL JUNGSEONG A
   // U+11AB HANGUL JONGSEONG NIEUN
   // U+1100 HANGUL CHOSEONG KIYEOK
   // U+1173 HANGUL JUNGSEONG EU
   // U+11AF HANGUL JONGSEONG RIEUL
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::ConversionOK)
                  .withScalars(0x1112, 0x1161, 0x11ab, 0x1100, 0x1173, 0x11af),
                  "\xe1\x84\x92\xe1\x85\xa1\xe1\x86\xab\xe1\x84\x80\xe1\x85\xb3"
                  "\xe1\x86\xaf"));

   //
   // 4-byte sequences
   //

   // U+E0100 VARIATION SELECTOR-17
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::ConversionOK).withScalars(0x000E0100),
                  "\xf3\xa0\x84\x80"));

   //
   // First possible sequence of a certain length
   //

   // U+0000 NULL
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::ConversionOK).withScalars(0x0000),
                  StringRef("\x00", 1)));

   // U+0080 PADDING CHARACTER
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::ConversionOK).withScalars(0x0080),
                  "\xc2\x80"));

   // U+0800 SAMARITAN LETTER ALAF
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::ConversionOK).withScalars(0x0800),
                  "\xe0\xa0\x80"));

   // U+10000 LINEAR B SYLLABLE B008 A
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::ConversionOK).withScalars(0x10000),
                  "\xf0\x90\x80\x80"));

   // U+200000 (invalid)
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal)
                  .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
                  "\xf8\x88\x80\x80\x80"));

   // U+4000000 (invalid)
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal)
                  .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
                  "\xfc\x84\x80\x80\x80\x80"));

   //
   // Last possible sequence of a certain length
   //

   // U+007F DELETE
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::ConversionOK).withScalars(0x007f), "\x7f"));

   // U+07FF (unassigned)
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::ConversionOK).withScalars(0x07ff),
                  "\xdf\xbf"));

   // U+FFFF (noncharacter)
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::ConversionOK).withScalars(0xffff),
                  "\xef\xbf\xbf"));

   // U+1FFFFF (invalid)
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal)
                  .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
                  "\xf7\xbf\xbf\xbf"));

   // U+3FFFFFF (invalid)
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal)
                  .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
                  "\xfb\xbf\xbf\xbf\xbf"));

   // U+7FFFFFFF (invalid)
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal)
                  .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
                  "\xfd\xbf\xbf\xbf\xbf\xbf"));

   //
   // Other boundary conditions
   //

   // U+D7FF (unassigned)
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::ConversionOK).withScalars(0xd7ff),
                  "\xed\x9f\xbf"));

   // U+E000 (private use)
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::ConversionOK).withScalars(0xe000),
                  "\xee\x80\x80"));

   // U+FFFD REPLACEMENT CHARACTER
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::ConversionOK).withScalars(0xfffd),
                  "\xef\xbf\xbd"));

   // U+10FFFF (noncharacter)
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::ConversionOK).withScalars(0x10ffff),
                  "\xf4\x8f\xbf\xbf"));

   // U+110000 (invalid)
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal)
                  .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
                  "\xf4\x90\x80\x80"));

   //
   // Unexpected continuation bytes
   //

   // A sequence of unexpected continuation bytes that don't follow a first
   // byte, every byte is a maximal subpart.

   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal).withScalars(0xfffd), "\x80"));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal).withScalars(0xfffd), "\xbf"));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal).withScalars(0xfffd, 0xfffd),
                  "\x80\x80"));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal).withScalars(0xfffd, 0xfffd),
                  "\x80\xbf"));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal).withScalars(0xfffd, 0xfffd),
                  "\xbf\x80"));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal)
                  .withScalars(0xfffd, 0xfffd, 0xfffd),
                  "\x80\xbf\x80"));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal)
                  .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
                  "\x80\xbf\x80\xbf"));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal)
                  .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
                  "\x80\xbf\x82\xbf\xaa"));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal)
                  .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
                  "\xaa\xb0\xbb\xbf\xaa\xa0"));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal)
                  .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
                  "\xaa\xb0\xbb\xbf\xaa\xa0\x8f"));

   // All continuation bytes (0x80--0xbf).
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal)
                  .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
                               0xfffd, 0xfffd, 0xfffd, 0xfffd)
                  .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
                               0xfffd, 0xfffd, 0xfffd, 0xfffd)
                  .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
                               0xfffd, 0xfffd, 0xfffd, 0xfffd)
                  .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
                               0xfffd, 0xfffd, 0xfffd, 0xfffd)
                  .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
                               0xfffd, 0xfffd, 0xfffd, 0xfffd)
                  .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
                               0xfffd, 0xfffd, 0xfffd, 0xfffd)
                  .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
                               0xfffd, 0xfffd, 0xfffd, 0xfffd)
                  .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
                               0xfffd, 0xfffd, 0xfffd, 0xfffd),
                  "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
                  "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
                  "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
                  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"));

   //
   // Lonely start bytes
   //

   // Start bytes of 2-byte sequences (0xc0--0xdf).
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal)
                  .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
                               0xfffd, 0xfffd, 0xfffd, 0xfffd)
                  .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
                               0xfffd, 0xfffd, 0xfffd, 0xfffd)
                  .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
                               0xfffd, 0xfffd, 0xfffd, 0xfffd)
                  .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
                               0xfffd, 0xfffd, 0xfffd, 0xfffd),
                  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
                  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"));

   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal)
                  .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
                               0xfffd, 0x0020, 0xfffd, 0x0020)
                  .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
                               0xfffd, 0x0020, 0xfffd, 0x0020)
                  .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
                               0xfffd, 0x0020, 0xfffd, 0x0020)
                  .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
                               0xfffd, 0x0020, 0xfffd, 0x0020)
                  .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
                               0xfffd, 0x0020, 0xfffd, 0x0020)
                  .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
                               0xfffd, 0x0020, 0xfffd, 0x0020)
                  .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
                               0xfffd, 0x0020, 0xfffd, 0x0020)
                  .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
                               0xfffd, 0x0020, 0xfffd, 0x0020),
                  "\xc0\x20\xc1\x20\xc2\x20\xc3\x20\xc4\x20\xc5\x20\xc6\x20\xc7\x20"
                  "\xc8\x20\xc9\x20\xca\x20\xcb\x20\xcc\x20\xcd\x20\xce\x20\xcf\x20"
                  "\xd0\x20\xd1\x20\xd2\x20\xd3\x20\xd4\x20\xd5\x20\xd6\x20\xd7\x20"
                  "\xd8\x20\xd9\x20\xda\x20\xdb\x20\xdc\x20\xdd\x20\xde\x20\xdf\x20"));

   // Start bytes of 3-byte sequences (0xe0--0xef).
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal)
                  .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
                               0xfffd, 0xfffd, 0xfffd, 0xfffd)
                  .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
                               0xfffd, 0xfffd, 0xfffd, 0xfffd),
                  "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"));

   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal)
                  .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
                               0xfffd, 0x0020, 0xfffd, 0x0020)
                  .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
                               0xfffd, 0x0020, 0xfffd, 0x0020)
                  .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
                               0xfffd, 0x0020, 0xfffd, 0x0020)
                  .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
                               0xfffd, 0x0020, 0xfffd, 0x0020),
                  "\xe0\x20\xe1\x20\xe2\x20\xe3\x20\xe4\x20\xe5\x20\xe6\x20\xe7\x20"
                  "\xe8\x20\xe9\x20\xea\x20\xeb\x20\xec\x20\xed\x20\xee\x20\xef\x20"));

   // Start bytes of 4-byte sequences (0xf0--0xf7).
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal)
                  .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
                               0xfffd, 0xfffd, 0xfffd, 0xfffd),
                  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"));

   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal)
                  .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
                               0xfffd, 0x0020, 0xfffd, 0x0020)
                  .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
                               0xfffd, 0x0020, 0xfffd, 0x0020),
                  "\xf0\x20\xf1\x20\xf2\x20\xf3\x20\xf4\x20\xf5\x20\xf6\x20\xf7\x20"));

   // Start bytes of 5-byte sequences (0xf8--0xfb).
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal)
                  .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
                  "\xf8\xf9\xfa\xfb"));

   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal)
                  .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
                               0xfffd, 0x0020, 0xfffd, 0x0020),
                  "\xf8\x20\xf9\x20\xfa\x20\xfb\x20"));

   // Start bytes of 6-byte sequences (0xfc--0xfd).
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal).withScalars(0xfffd, 0xfffd),
                  "\xfc\xfd"));

   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal)
                  .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020),
                  "\xfc\x20\xfd\x20"));

   //
   // Other bytes (0xc0--0xc1, 0xfe--0xff).
   //

   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal).withScalars(0xfffd), "\xc0"));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal).withScalars(0xfffd), "\xc1"));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal).withScalars(0xfffd), "\xfe"));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal).withScalars(0xfffd), "\xff"));

   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal)
                  .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
                  "\xc0\xc1\xfe\xff"));

   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal)
                  .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
                  "\xfe\xfe\xff\xff"));

   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal)
                  .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
                  "\xfe\x80\x80\x80\x80\x80"));

   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal)
                  .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
                  "\xff\x80\x80\x80\x80\x80"));

   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal)
                  .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
                               0xfffd, 0x0020, 0xfffd, 0x0020),
                  "\xc0\x20\xc1\x20\xfe\x20\xff\x20"));

   //
   // Sequences with one continuation byte missing
   //

   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal).withScalars(0xfffd), "\xc2"));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal).withScalars(0xfffd), "\xdf"));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal).withScalars(0xfffd),
                  "\xe0\xa0"));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal).withScalars(0xfffd),
                  "\xe0\xbf"));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal).withScalars(0xfffd),
                  "\xe1\x80"));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal).withScalars(0xfffd),
                  "\xec\xbf"));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal).withScalars(0xfffd),
                  "\xed\x80"));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal).withScalars(0xfffd),
                  "\xed\x9f"));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal).withScalars(0xfffd),
                  "\xee\x80"));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal).withScalars(0xfffd),
                  "\xef\xbf"));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal).withScalars(0xfffd),
                  "\xf0\x90\x80"));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal).withScalars(0xfffd),
                  "\xf0\xbf\xbf"));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal).withScalars(0xfffd),
                  "\xf1\x80\x80"));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal).withScalars(0xfffd),
                  "\xf3\xbf\xbf"));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal).withScalars(0xfffd),
                  "\xf4\x80\x80"));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal).withScalars(0xfffd),
                  "\xf4\x8f\xbf"));

   // Overlong sequences with one trailing byte missing.
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal).withScalars(0xfffd),
                  "\xc0"));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal).withScalars(0xfffd),
                  "\xc1"));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal).withScalars(0xfffd, 0xfffd),
                  "\xe0\x80"));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal).withScalars(0xfffd, 0xfffd),
                  "\xe0\x9f"));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal)
                  .withScalars(0xfffd, 0xfffd, 0xfffd),
                  "\xf0\x80\x80"));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal)
                  .withScalars(0xfffd, 0xfffd, 0xfffd),
                  "\xf0\x8f\x80"));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal)
                  .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
                  "\xf8\x80\x80\x80"));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal)
                  .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
                  "\xfc\x80\x80\x80\x80"));

   // Sequences that represent surrogates with one trailing byte missing.
   // High surrogates
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal).withScalars(0xfffd, 0xfffd),
                  "\xed\xa0"));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal).withScalars(0xfffd, 0xfffd),
                  "\xed\xac"));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal).withScalars(0xfffd, 0xfffd),
                  "\xed\xaf"));
   // Low surrogates
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal).withScalars(0xfffd, 0xfffd),
                  "\xed\xb0"));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal).withScalars(0xfffd, 0xfffd),
                  "\xed\xb4"));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal).withScalars(0xfffd, 0xfffd),
                  "\xed\xbf"));

   // Ill-formed 4-byte sequences.
   // 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx
   // U+1100xx (invalid)
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal)
                  .withScalars(0xfffd, 0xfffd, 0xfffd),
                  "\xf4\x90\x80"));
   // U+13FBxx (invalid)
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal)
                  .withScalars(0xfffd, 0xfffd, 0xfffd),
                  "\xf4\xbf\xbf"));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal)
                  .withScalars(0xfffd, 0xfffd, 0xfffd),
                  "\xf5\x80\x80"));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal)
                  .withScalars(0xfffd, 0xfffd, 0xfffd),
                  "\xf6\x80\x80"));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal)
                  .withScalars(0xfffd, 0xfffd, 0xfffd),
                  "\xf7\x80\x80"));
   // U+1FFBxx (invalid)
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal)
                  .withScalars(0xfffd, 0xfffd, 0xfffd),
                  "\xf7\xbf\xbf"));

   // Ill-formed 5-byte sequences.
   // 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
   // U+2000xx (invalid)
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal)
                  .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
                  "\xf8\x88\x80\x80"));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal)
                  .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
                  "\xf8\xbf\xbf\xbf"));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal)
                  .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
                  "\xf9\x80\x80\x80"));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal)
                  .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
                  "\xfa\x80\x80\x80"));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal)
                  .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
                  "\xfb\x80\x80\x80"));
   // U+3FFFFxx (invalid)
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal)
                  .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
                  "\xfb\xbf\xbf\xbf"));

   // Ill-formed 6-byte sequences.
   // 1111110u 10uuuuuu 10uzzzzz 10zzzyyyy 10yyyyxx 10xxxxxx
   // U+40000xx (invalid)
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal)
                  .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
                  "\xfc\x84\x80\x80\x80"));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal)
                  .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
                  "\xfc\xbf\xbf\xbf\xbf"));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal)
                  .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
                  "\xfd\x80\x80\x80\x80"));
   // U+7FFFFFxx (invalid)
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal)
                  .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
                  "\xfd\xbf\xbf\xbf\xbf"));

   //
   // Sequences with two continuation bytes missing
   //

   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal).withScalars(0xfffd),
                  "\xf0\x90"));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal).withScalars(0xfffd),
                  "\xf0\xbf"));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal).withScalars(0xfffd),
                  "\xf1\x80"));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal).withScalars(0xfffd),
                  "\xf3\xbf"));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal).withScalars(0xfffd),
                  "\xf4\x80"));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal).withScalars(0xfffd),
                  "\xf4\x8f"));

   // Overlong sequences with two trailing byte missing.
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal).withScalars(0xfffd), "\xe0"));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal).withScalars(0xfffd, 0xfffd),
                  "\xf0\x80"));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal).withScalars(0xfffd, 0xfffd),
                  "\xf0\x8f"));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal)
                  .withScalars(0xfffd, 0xfffd, 0xfffd),
                  "\xf8\x80\x80"));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal)
                  .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
                  "\xfc\x80\x80\x80"));

   // Sequences that represent surrogates with two trailing bytes missing.
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal).withScalars(0xfffd), "\xed"));

   // Ill-formed 4-byte sequences.
   // 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx
   // U+110yxx (invalid)
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal).withScalars(0xfffd, 0xfffd),
                  "\xf4\x90"));
   // U+13Fyxx (invalid)
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal).withScalars(0xfffd, 0xfffd),
                  "\xf4\xbf"));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal).withScalars(0xfffd, 0xfffd),
                  "\xf5\x80"));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal).withScalars(0xfffd, 0xfffd),
                  "\xf6\x80"));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal).withScalars(0xfffd, 0xfffd),
                  "\xf7\x80"));
   // U+1FFyxx (invalid)
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal).withScalars(0xfffd, 0xfffd),
                  "\xf7\xbf"));

   // Ill-formed 5-byte sequences.
   // 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
   // U+200yxx (invalid)
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
                  "\xf8\x88\x80"));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
                  "\xf8\xbf\xbf"));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
                  "\xf9\x80\x80"));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
                  "\xfa\x80\x80"));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
                  "\xfb\x80\x80"));
   // U+3FFFyxx (invalid)
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
                  "\xfb\xbf\xbf"));

   // Ill-formed 6-byte sequences.
   // 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
   // U+4000yxx (invalid)
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
                  "\xfc\x84\x80\x80"));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
                  "\xfc\xbf\xbf\xbf"));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
                  "\xfd\x80\x80\x80"));
   // U+7FFFFyxx (invalid)
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
                  "\xfd\xbf\xbf\xbf"));

   //
   // Sequences with three continuation bytes missing
   //

   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal).withScalars(0xfffd), "\xf0"));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal).withScalars(0xfffd), "\xf1"));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal).withScalars(0xfffd), "\xf2"));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal).withScalars(0xfffd), "\xf3"));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal).withScalars(0xfffd), "\xf4"));

   // Broken overlong sequences.
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal).withScalars(0xfffd), "\xf0"));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal).withScalars(0xfffd, 0xfffd),
                  "\xf8\x80"));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
                  "\xfc\x80\x80"));

   // Ill-formed 4-byte sequences.
   // 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx
   // U+14yyxx (invalid)
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal).withScalars(0xfffd), "\xf5"));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal).withScalars(0xfffd), "\xf6"));
   // U+1Cyyxx (invalid)
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal).withScalars(0xfffd), "\xf7"));

   // Ill-formed 5-byte sequences.
   // 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
   // U+20yyxx (invalid)
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal).withScalars(0xfffd, 0xfffd),
                  "\xf8\x88"));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal).withScalars(0xfffd, 0xfffd),
                  "\xf8\xbf"));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal).withScalars(0xfffd, 0xfffd),
                  "\xf9\x80"));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal).withScalars(0xfffd, 0xfffd),
                  "\xfa\x80"));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal).withScalars(0xfffd, 0xfffd),
                  "\xfb\x80"));
   // U+3FCyyxx (invalid)
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal).withScalars(0xfffd, 0xfffd),
                  "\xfb\xbf"));

   // Ill-formed 6-byte sequences.
   // 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
   // U+400yyxx (invalid)
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
                  "\xfc\x84\x80"));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
                  "\xfc\xbf\xbf"));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
                  "\xfd\x80\x80"));
   // U+7FFCyyxx (invalid)
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
                  "\xfd\xbf\xbf"));

   //
   // Sequences with four continuation bytes missing
   //

   // Ill-formed 5-byte sequences.
   // 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
   // U+uzyyxx (invalid)
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal).withScalars(0xfffd), "\xf8"));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal).withScalars(0xfffd), "\xf9"));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal).withScalars(0xfffd), "\xfa"));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal).withScalars(0xfffd), "\xfb"));
   // U+3zyyxx (invalid)
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal).withScalars(0xfffd), "\xfb"));

   // Broken overlong sequences.
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal).withScalars(0xfffd), "\xf8"));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal).withScalars(0xfffd, 0xfffd),
                  "\xfc\x80"));

   // Ill-formed 6-byte sequences.
   // 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
   // U+uzzyyxx (invalid)
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal).withScalars(0xfffd, 0xfffd),
                  "\xfc\x84"));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal).withScalars(0xfffd, 0xfffd),
                  "\xfc\xbf"));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal).withScalars(0xfffd, 0xfffd),
                  "\xfd\x80"));
   // U+7Fzzyyxx (invalid)
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal).withScalars(0xfffd, 0xfffd),
                  "\xfd\xbf"));

   //
   // Sequences with five continuation bytes missing
   //

   // Ill-formed 6-byte sequences.
   // 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
   // U+uzzyyxx (invalid)
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal).withScalars(0xfffd), "\xfc"));
   // U+uuzzyyxx (invalid)
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal).withScalars(0xfffd), "\xfd"));

   //
   // Consecutive sequences with trailing bytes missing
   //

   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal)
                  .withScalars(0xfffd, /**/ 0xfffd, 0xfffd, /**/ 0xfffd, 0xfffd, 0xfffd)
                  .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd)
                  .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd)
                  .withScalars(0xfffd, /**/ 0xfffd, /**/ 0xfffd, 0xfffd, 0xfffd)
                  .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd)
                  .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
                  "\xc0" "\xe0\x80" "\xf0\x80\x80"
                  "\xf8\x80\x80\x80"
                  "\xfc\x80\x80\x80\x80"
                  "\xdf" "\xef\xbf" "\xf7\xbf\xbf"
                  "\xfb\xbf\xbf\xbf"
                  "\xfd\xbf\xbf\xbf\xbf"));

   //
   // Overlong UTF-8 sequences
   //

   // U+002F SOLIDUS
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::ConversionOK).withScalars(0x002f), "\x2f"));

   // Overlong sequences of the above.
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal).withScalars(0xfffd, 0xfffd),
                  "\xc0\xaf"));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal)
                  .withScalars(0xfffd, 0xfffd, 0xfffd),
                  "\xe0\x80\xaf"));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal)
                  .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
                  "\xf0\x80\x80\xaf"));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal)
                  .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
                  "\xf8\x80\x80\x80\xaf"));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal)
                  .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
                  "\xfc\x80\x80\x80\x80\xaf"));

   // U+0000 NULL
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::ConversionOK).withScalars(0x0000),
                  StringRef("\x00", 1)));

   // Overlong sequences of the above.
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal).withScalars(0xfffd, 0xfffd),
                  "\xc0\x80"));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal)
                  .withScalars(0xfffd, 0xfffd, 0xfffd),
                  "\xe0\x80\x80"));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal)
                  .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
                  "\xf0\x80\x80\x80"));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal)
                  .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
                  "\xf8\x80\x80\x80\x80"));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal)
                  .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
                  "\xfc\x80\x80\x80\x80\x80"));

   // Other overlong sequences.
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal).withScalars(0xfffd, 0xfffd),
                  "\xc0\xbf"));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal).withScalars(0xfffd, 0xfffd),
                  "\xc1\x80"));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal).withScalars(0xfffd, 0xfffd),
                  "\xc1\xbf"));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal)
                  .withScalars(0xfffd, 0xfffd, 0xfffd),
                  "\xe0\x9f\xbf"));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal)
                  .withScalars(0xfffd, 0xfffd, 0xfffd),
                  "\xed\xa0\x80"));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal)
                  .withScalars(0xfffd, 0xfffd, 0xfffd),
                  "\xed\xbf\xbf"));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal)
                  .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
                  "\xf0\x8f\x80\x80"));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal)
                  .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
                  "\xf0\x8f\xbf\xbf"));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal)
                  .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
                  "\xf8\x87\xbf\xbf\xbf"));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal)
                  .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
                  "\xfc\x83\xbf\xbf\xbf\xbf"));

   //
   // Isolated surrogates
   //

   // Unicode 6.3.0:
   //
   //    D71.  High-surrogate code point: A Unicode code point in the range
   //    U+D800 to U+DBFF.
   //
   //    D73.  Low-surrogate code point: A Unicode code point in the range
   //    U+DC00 to U+DFFF.

   // Note: U+E0100 is <DB40 DD00> in UTF16.

   // High surrogates

   // U+D800
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal)
                  .withScalars(0xfffd, 0xfffd, 0xfffd),
                  "\xed\xa0\x80"));

   // U+DB40
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal)
                  .withScalars(0xfffd, 0xfffd, 0xfffd),
                  "\xed\xac\xa0"));

   // U+DBFF
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal)
                  .withScalars(0xfffd, 0xfffd, 0xfffd),
                  "\xed\xaf\xbf"));

   // Low surrogates

   // U+DC00
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal)
                  .withScalars(0xfffd, 0xfffd, 0xfffd),
                  "\xed\xb0\x80"));

   // U+DD00
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal)
                  .withScalars(0xfffd, 0xfffd, 0xfffd),
                  "\xed\xb4\x80"));

   // U+DFFF
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal)
                  .withScalars(0xfffd, 0xfffd, 0xfffd),
                  "\xed\xbf\xbf"));

   // Surrogate pairs

   // U+D800 U+DC00
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal)
                  .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
                  "\xed\xa0\x80\xed\xb0\x80"));

   // U+D800 U+DD00
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal)
                  .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
                  "\xed\xa0\x80\xed\xb4\x80"));

   // U+D800 U+DFFF
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal)
                  .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
                  "\xed\xa0\x80\xed\xbf\xbf"));

   // U+DB40 U+DC00
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal)
                  .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
                  "\xed\xac\xa0\xed\xb0\x80"));

   // U+DB40 U+DD00
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal)
                  .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
                  "\xed\xac\xa0\xed\xb4\x80"));

   // U+DB40 U+DFFF
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal)
                  .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
                  "\xed\xac\xa0\xed\xbf\xbf"));

   // U+DBFF U+DC00
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal)
                  .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
                  "\xed\xaf\xbf\xed\xb0\x80"));

   // U+DBFF U+DD00
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal)
                  .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
                  "\xed\xaf\xbf\xed\xb4\x80"));

   // U+DBFF U+DFFF
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceIllegal)
                  .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
                  "\xed\xaf\xbf\xed\xbf\xbf"));

   //
   // Noncharacters
   //

   // Unicode 6.3.0:
   //
   //    D14.  Noncharacter: A code point that is permanently reserved for
   //    internal use and that should never be interchanged. Noncharacters
   //    consist of the values U+nFFFE and U+nFFFF (where n is from 0 to 1016)
   //    and the values U+FDD0..U+FDEF.

   // U+FFFE
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::ConversionOK).withScalars(0xfffe),
                  "\xef\xbf\xbe"));

   // U+FFFF
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::ConversionOK).withScalars(0xffff),
                  "\xef\xbf\xbf"));

   // U+1FFFE
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::ConversionOK).withScalars(0x1fffe),
                  "\xf0\x9f\xbf\xbe"));

   // U+1FFFF
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::ConversionOK).withScalars(0x1ffff),
                  "\xf0\x9f\xbf\xbf"));

   // U+2FFFE
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::ConversionOK).withScalars(0x2fffe),
                  "\xf0\xaf\xbf\xbe"));

   // U+2FFFF
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::ConversionOK).withScalars(0x2ffff),
                  "\xf0\xaf\xbf\xbf"));

   // U+3FFFE
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::ConversionOK).withScalars(0x3fffe),
                  "\xf0\xbf\xbf\xbe"));

   // U+3FFFF
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::ConversionOK).withScalars(0x3ffff),
                  "\xf0\xbf\xbf\xbf"));

   // U+4FFFE
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::ConversionOK).withScalars(0x4fffe),
                  "\xf1\x8f\xbf\xbe"));

   // U+4FFFF
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::ConversionOK).withScalars(0x4ffff),
                  "\xf1\x8f\xbf\xbf"));

   // U+5FFFE
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::ConversionOK).withScalars(0x5fffe),
                  "\xf1\x9f\xbf\xbe"));

   // U+5FFFF
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::ConversionOK).withScalars(0x5ffff),
                  "\xf1\x9f\xbf\xbf"));

   // U+6FFFE
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::ConversionOK).withScalars(0x6fffe),
                  "\xf1\xaf\xbf\xbe"));

   // U+6FFFF
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::ConversionOK).withScalars(0x6ffff),
                  "\xf1\xaf\xbf\xbf"));

   // U+7FFFE
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::ConversionOK).withScalars(0x7fffe),
                  "\xf1\xbf\xbf\xbe"));

   // U+7FFFF
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::ConversionOK).withScalars(0x7ffff),
                  "\xf1\xbf\xbf\xbf"));

   // U+8FFFE
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::ConversionOK).withScalars(0x8fffe),
                  "\xf2\x8f\xbf\xbe"));

   // U+8FFFF
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::ConversionOK).withScalars(0x8ffff),
                  "\xf2\x8f\xbf\xbf"));

   // U+9FFFE
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::ConversionOK).withScalars(0x9fffe),
                  "\xf2\x9f\xbf\xbe"));

   // U+9FFFF
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::ConversionOK).withScalars(0x9ffff),
                  "\xf2\x9f\xbf\xbf"));

   // U+AFFFE
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::ConversionOK).withScalars(0xafffe),
                  "\xf2\xaf\xbf\xbe"));

   // U+AFFFF
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::ConversionOK).withScalars(0xaffff),
                  "\xf2\xaf\xbf\xbf"));

   // U+BFFFE
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::ConversionOK).withScalars(0xbfffe),
                  "\xf2\xbf\xbf\xbe"));

   // U+BFFFF
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::ConversionOK).withScalars(0xbffff),
                  "\xf2\xbf\xbf\xbf"));

   // U+CFFFE
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::ConversionOK).withScalars(0xcfffe),
                  "\xf3\x8f\xbf\xbe"));

   // U+CFFFF
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::ConversionOK).withScalars(0xcfffF),
                  "\xf3\x8f\xbf\xbf"));

   // U+DFFFE
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::ConversionOK).withScalars(0xdfffe),
                  "\xf3\x9f\xbf\xbe"));

   // U+DFFFF
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::ConversionOK).withScalars(0xdffff),
                  "\xf3\x9f\xbf\xbf"));

   // U+EFFFE
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::ConversionOK).withScalars(0xefffe),
                  "\xf3\xaf\xbf\xbe"));

   // U+EFFFF
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::ConversionOK).withScalars(0xeffff),
                  "\xf3\xaf\xbf\xbf"));

   // U+FFFFE
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::ConversionOK).withScalars(0xffffe),
                  "\xf3\xbf\xbf\xbe"));

   // U+FFFFF
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::ConversionOK).withScalars(0xfffff),
                  "\xf3\xbf\xbf\xbf"));

   // U+10FFFE
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::ConversionOK).withScalars(0x10fffe),
                  "\xf4\x8f\xbf\xbe"));

   // U+10FFFF
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::ConversionOK).withScalars(0x10ffff),
                  "\xf4\x8f\xbf\xbf"));

   // U+FDD0
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::ConversionOK).withScalars(0xfdd0),
                  "\xef\xb7\x90"));

   // U+FDD1
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::ConversionOK).withScalars(0xfdd1),
                  "\xef\xb7\x91"));

   // U+FDD2
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::ConversionOK).withScalars(0xfdd2),
                  "\xef\xb7\x92"));

   // U+FDD3
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::ConversionOK).withScalars(0xfdd3),
                  "\xef\xb7\x93"));

   // U+FDD4
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::ConversionOK).withScalars(0xfdd4),
                  "\xef\xb7\x94"));

   // U+FDD5
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::ConversionOK).withScalars(0xfdd5),
                  "\xef\xb7\x95"));

   // U+FDD6
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::ConversionOK).withScalars(0xfdd6),
                  "\xef\xb7\x96"));

   // U+FDD7
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::ConversionOK).withScalars(0xfdd7),
                  "\xef\xb7\x97"));

   // U+FDD8
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::ConversionOK).withScalars(0xfdd8),
                  "\xef\xb7\x98"));

   // U+FDD9
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::ConversionOK).withScalars(0xfdd9),
                  "\xef\xb7\x99"));

   // U+FDDA
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::ConversionOK).withScalars(0xfdda),
                  "\xef\xb7\x9a"));

   // U+FDDB
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::ConversionOK).withScalars(0xfddb),
                  "\xef\xb7\x9b"));

   // U+FDDC
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::ConversionOK).withScalars(0xfddc),
                  "\xef\xb7\x9c"));

   // U+FDDD
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::ConversionOK).withScalars(0xfddd),
                  "\xef\xb7\x9d"));

   // U+FDDE
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::ConversionOK).withScalars(0xfdde),
                  "\xef\xb7\x9e"));

   // U+FDDF
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::ConversionOK).withScalars(0xfddf),
                  "\xef\xb7\x9f"));

   // U+FDE0
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::ConversionOK).withScalars(0xfde0),
                  "\xef\xb7\xa0"));

   // U+FDE1
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::ConversionOK).withScalars(0xfde1),
                  "\xef\xb7\xa1"));

   // U+FDE2
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::ConversionOK).withScalars(0xfde2),
                  "\xef\xb7\xa2"));

   // U+FDE3
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::ConversionOK).withScalars(0xfde3),
                  "\xef\xb7\xa3"));

   // U+FDE4
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::ConversionOK).withScalars(0xfde4),
                  "\xef\xb7\xa4"));

   // U+FDE5
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::ConversionOK).withScalars(0xfde5),
                  "\xef\xb7\xa5"));

   // U+FDE6
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::ConversionOK).withScalars(0xfde6),
                  "\xef\xb7\xa6"));

   // U+FDE7
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::ConversionOK).withScalars(0xfde7),
                  "\xef\xb7\xa7"));

   // U+FDE8
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::ConversionOK).withScalars(0xfde8),
                  "\xef\xb7\xa8"));

   // U+FDE9
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::ConversionOK).withScalars(0xfde9),
                  "\xef\xb7\xa9"));

   // U+FDEA
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::ConversionOK).withScalars(0xfdea),
                  "\xef\xb7\xaa"));

   // U+FDEB
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::ConversionOK).withScalars(0xfdeb),
                  "\xef\xb7\xab"));

   // U+FDEC
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::ConversionOK).withScalars(0xfdec),
                  "\xef\xb7\xac"));

   // U+FDED
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::ConversionOK).withScalars(0xfded),
                  "\xef\xb7\xad"));

   // U+FDEE
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::ConversionOK).withScalars(0xfdee),
                  "\xef\xb7\xae"));

   // U+FDEF
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::ConversionOK).withScalars(0xfdef),
                  "\xef\xb7\xaf"));

   // U+FDF0
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::ConversionOK).withScalars(0xfdf0),
                  "\xef\xb7\xb0"));

   // U+FDF1
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::ConversionOK).withScalars(0xfdf1),
                  "\xef\xb7\xb1"));

   // U+FDF2
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::ConversionOK).withScalars(0xfdf2),
                  "\xef\xb7\xb2"));

   // U+FDF3
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::ConversionOK).withScalars(0xfdf3),
                  "\xef\xb7\xb3"));

   // U+FDF4
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::ConversionOK).withScalars(0xfdf4),
                  "\xef\xb7\xb4"));

   // U+FDF5
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::ConversionOK).withScalars(0xfdf5),
                  "\xef\xb7\xb5"));

   // U+FDF6
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::ConversionOK).withScalars(0xfdf6),
                  "\xef\xb7\xb6"));

   // U+FDF7
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::ConversionOK).withScalars(0xfdf7),
                  "\xef\xb7\xb7"));

   // U+FDF8
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::ConversionOK).withScalars(0xfdf8),
                  "\xef\xb7\xb8"));

   // U+FDF9
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::ConversionOK).withScalars(0xfdf9),
                  "\xef\xb7\xb9"));

   // U+FDFA
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::ConversionOK).withScalars(0xfdfa),
                  "\xef\xb7\xba"));

   // U+FDFB
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::ConversionOK).withScalars(0xfdfb),
                  "\xef\xb7\xbb"));

   // U+FDFC
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::ConversionOK).withScalars(0xfdfc),
                  "\xef\xb7\xbc"));

   // U+FDFD
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::ConversionOK).withScalars(0xfdfd),
                  "\xef\xb7\xbd"));

   // U+FDFE
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::ConversionOK).withScalars(0xfdfe),
                  "\xef\xb7\xbe"));

   // U+FDFF
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::ConversionOK).withScalars(0xfdff),
                  "\xef\xb7\xbf"));
}

TEST(ConvertUtfTest, testUTF8ToUTF32PartialLenient)
{
   // U+0041 LATIN CAPITAL LETTER A
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::ConversionOK).withScalars(0x0041),
                  "\x41", true));

   //
   // Sequences with one continuation byte missing
   //

   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceExhausted),
                  "\xc2", true));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceExhausted),
                  "\xdf", true));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceExhausted),
                  "\xe0\xa0", true));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceExhausted),
                  "\xe0\xbf", true));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceExhausted),
                  "\xe1\x80", true));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceExhausted),
                  "\xec\xbf", true));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceExhausted),
                  "\xed\x80", true));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceExhausted),
                  "\xed\x9f", true));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceExhausted),
                  "\xee\x80", true));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceExhausted),
                  "\xef\xbf", true));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceExhausted),
                  "\xf0\x90\x80", true));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceExhausted),
                  "\xf0\xbf\xbf", true));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceExhausted),
                  "\xf1\x80\x80", true));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceExhausted),
                  "\xf3\xbf\xbf", true));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceExhausted),
                  "\xf4\x80\x80", true));
   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceExhausted),
                  "\xf4\x8f\xbf", true));

   EXPECT_TRUE(check_convert_utf8_to_unicode_scalars(
                  ConvertUTFResultContainer(ConversionResult::SourceExhausted).withScalars(0x0041),
                  "\x41\xc2", true));
}
